rm(list=ls())
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
options(max.print=1000)
surveys_complete <- read_csv('data/surveys_complete.csv')
## Parsed with column specification:
## cols(
## record_id = col_double(),
## month = col_double(),
## day = col_double(),
## year = col_double(),
## plot_id = col_double(),
## species_id = col_character(),
## sex = col_character(),
## hindfoot_length = col_double(),
## weight = col_double(),
## genus = col_character(),
## species = col_character(),
## taxa = col_character(),
## plot_type = col_character()
## )
view(surveys_complete)
Data Visualization with ggplot2
Basic Template: ggplot(data = , mapping = aes(
use the ggplot() function - bind the plot to a specific data frame using the data argument - define an aesthetic mapping - add a geom to the plot use + operator to do so. Since we have two continuous variables, lets use geom_point() ( + allows you to modify existing ggplot)
ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) + geom_point()
also you can assign and draw the plot with the following code
surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))
# note the syntax below for adding layers
surveys_plot +
geom_point()
Challenge - Scatter Plots
using the package hexbin ggplot , hexagons are assigned colors based on the number of observations that fall within its boundaries (hexagonal binning)
library("hexbin")
surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))
surveys_plot +
geom_hex()
strength: having the color code for the concentration of data in one area
weakness: with using hexagons as a shape, when viewing you see a condensed version of the data and loose information of the nuances for example of outliers
Building Plots Iteratively
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, color = 'blue' )
or we can color each species in the plot differently
ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, aes(color = species_id))
Challenge - Iterative Plots
create a scatter plot of weight over species_id with the plot types showing in different colors
challenge_plot <- ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight))
challenge_plot +
geom_point(aes(color= plot_type))
Box Plot
visualize the distribution of weight within each species
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot()
add points to the boxplot for ideas of the number of measurements and of their distribution
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot(alpha = 0) +
geom_jitter(alpha = 0.3, color = "tomato")
Challenge - Box Plots
beanplots / violin plots shape and density information are shown (unlike boxplot)
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_violin(alpha = 0) +
geom_jitter(alpha = 0.3, color = "tomato") +
scale_y_log10()
try exploring other variables
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.1) +
geom_point(aes(color = plot_id))+
geom_boxplot(color = 'red')
# consider changing plot_id from an integer to a factor
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.1, aes(color = factor(c(plot_id)))) +
geom_boxplot(color = 'gray')
when changing to a factor from an integer a display of colors appears along the right hand side showing the color and corresponding number key
Plotting time series data
calculate the number of counts per year of each genus
to do this we need to group the data and ocunt records within each group
yearly_counts <- surveys_complete %>%
count(year, genus)
# visualize timelapse as a line plot
ggplot(data = yearly_counts, aes(x=year, y=n))+
geom_line()
# this doesnt work since we plotted the data for all the genera together we need to tell ggplot to draw a line for each genus by modifying the aesthetic function to include...
ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
geom_line()
# species by color
ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
geom_line()
pipe operator %>% we can also use it to pass the data argument to the ggplot() function
build using + not the pipe operator
yearly_counts %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
# link data manipulation with consequent data visualization
yearly_counts_graph <- surveys_complete %>%
count(year, genus) %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
yearly_counts_graph
faceting allows the user to split one pllot into multiple plots based on a factor included in the dataset
we will use it to make a time series plot for each species
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line() +
facet_wrap(facets = vars(genus))
#split the line in each plot by the sex of each induvidual measured
# make counts in the data frame
yearly_sex_counts <- surveys_complete %>%
count(year, genus, sex)
# make the faceting plot by splitting further by sex using color
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(facets = vars(genus))
# facet both by sex and genus
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(sex), cols = vars(genus))
# organize the panels only by rows or only by columns
# One column, facet by rows
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(genus))
# One row, facet by column
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(cols = vars(genus))
every compoenent of the ggplot graph can be customized using the generic theme() function
change the previous graph to have a simpler white background using the thene_bw() function
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
theme_bw()
for a complete list of themes visit https://ggplot2.tidyverse.org/reference/ggtheme.html
Challenge - Create a plot
create a plot that depicts how the average weight of each species changes through the years
yearly_weight <- surveys_complete %>%
group_by(species_id, year) %>%
summarize(avg_weight = mean(weight))
## `summarise()` regrouping output by 'species_id' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x=year, y=avg_weight)) +
geom_line() +
facet_wrap(vars(species_id)) +
theme_bw()
customization
# change names of axes
ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw()
# increasing font size
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(text=element_text(size = 16))
# fix overlap of labels
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
strip.text = element_text(face = "italic"),
text = element_text(size = 16))
# we also made the strip.text italicized above
# we can save these changes to make it the default theme
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12,
angle = 90, hjust = 0.5,
vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text=element_text(size = 16))
ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
grey_theme
Challenge - Make your own beautiful graph using the gg plot cheat sheet
I am going to build off of the one row, facet from column graph created earlier
# original
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(cols = vars(genus))
# changed
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_step() +
facet_grid(cols = vars(genus)) +
labs(title = "Induviduals of Each Sex Grouped by Species",
x = "Year",
y = "Number of Induviduals") +
theme(axis.text.x = element_text(colour = "grey20", size = 10, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 10),
strip.text = element_text(face = "italic"),
text = element_text(size = 10))
Arranging and exporting plots
# combine plots into a single figure
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
spp_weight_boxplot <- ggplot(data = surveys_complete,
aes(x = species_id, y = weight)) +
geom_boxplot() +
labs(x = "Species",
y = expression(log[10](Weight))) +
scale_y_log10() +
labs()
spp_count_plot <- ggplot(data = yearly_counts,
aes(x = year, y = n, color = genus)) +
geom_line() +
labs(x = "Year", y = "Abundance")
grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2, widths = c(4, 6))
# adjust width height and dpi
my_plot <- ggplot(data = yearly_sex_counts,
aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text = element_text(size = 16))
ggsave("name_of_file.png", my_plot, width = 15, height = 10)
## This also works for grid.arrange() plots
combo_plot <- grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2,
widths = c(4, 6))
ggsave("combo_plot_abun_weight.png", combo_plot, width = 10, dpi = 300)
## Saving 10 x 5 in image